#!/usr/bin/env python

import os
import sys
import gzip
import bsddb
import cStringIO

def parse_origin(f):
    orig_started = False
    origin = ''
    for line in f:
        if orig_started:
            if line.startswith('//'):
                break
            else:
                origin += ''.join(line.strip().split()[1:])
        if line.startswith('ORIGIN'):
            orig_started = True
    else:
        return ''
    return origin

if __name__ == "__main__":
 
    orgs_dir = sys.argv[1]
    genom_db_fname = sys.argv[2]

    cnt_all = 0
    cnt_success = 0
    genom_db = bsddb.btopen(genom_db_fname, 'w')
    for key in os.listdir(orgs_dir):
        fobj = gzip.open(os.path.join(orgs_dir, key))
        cnt_all += 1
        origin = parse_origin(fobj)
        fobj.close()
        if not origin:
            print >> sys.stderr, 'Empty origin for locus %s' % key
            continue
        genom_db[key] = origin
        cnt_success += 1

    genom_db.sync()
    genom_db.close()

    print "Done! (%s/%s)" % (cnt_success, cnt_all)

